********************************************************************************
* 05_secondary_regressions.do
* Secondary Event Study Regressions for JHR Paper
* "Teacher Testing Standards and the New Teacher Pipeline"
* Law, Marks, and Stern
*
* Regressions:
*   - Placebo: Non-Education Enrollments (Figure 5A)
*   - Placebo: Non-Education Completions (Figure 5B)
*   - Placebo: Other Education Completions (Figure 5C)
*   - State Licenses Event Study (Figure 6)
*   - Teacher Shortages Event Study (Figure 7)
*
* Inputs:
*   - data/raw/composite_treatment/graduation_event_data.xlsx
*   - data/raw/composite_treatment/enrollments_event_data.xlsx
*   - data/raw/placebo/placebo_data.dta
*   - data/raw/placebo/placebo_enrollment_data.dta
*   - data/raw/policy/state_treatment.dta
*   - data/raw/licenses/stateyrlicensetradalt.dta
*   - data/raw/shortages/total_shortages_state_year.dta
*
* Outputs:
*   - output/tables/enrollments_placebo_event_study_total.csv
*   - output/tables/placebo_non_ed_completions.csv
*   - output/tables/placebo_other_ed_completions.csv
*   - output/tables/state_licenses_event_study_coefficients.csv
*   - output/tables/teacher_shortage_event_study.csv
*
* Bootstrap: reps(1000) seed(12345) cluster(State) throughout
********************************************************************************

clear all
set more off

display "========================================================"
display "05: Secondary Event Study Regressions"
display "========================================================"

capture mkdir "output"
capture mkdir "output/tables"

* Control variables
global state_demo "real_income unemployment_rate"
global state_kraft "passevals implementevals eliminate_tenure increase_probationary_period weaken_bargaining eliminate_union_dues won_race_top common_core edtpa"
global uni_controls "test_optional satpct2 actpct2 satvr25_2 satvr75_2 satmt25_2 satmt75_2 actcm25_2 actcm75_2 pell_percent2 pell_amount2 loan_percent2 loan_average2 enrollment_total2"
global all_controls "$state_demo $state_kraft $uni_controls"

* Helper program: write event study CSV from bootstrap results
capture program drop write_es_csv
program define write_es_csv
    args outcsv ref_year year_list

    matrix coefs = e(b)
    matrix V = e(V)

    tempname fh
    file open `fh' using "`outcsv'", write replace
    file write `fh' "year,estimate,std_error,conf_low,conf_high" _n

    local coef_idx = 1
    foreach yr of local year_list {
        if `yr' == `ref_year' {
            file write `fh' "`ref_year',0,0,0,0" _n
        }
        else {
            local b = coefs[1, `coef_idx']
            local se = sqrt(V[`coef_idx', `coef_idx'])
            local ci_lo = `b' - 1.96 * `se'
            local ci_hi = `b' + 1.96 * `se'
            file write `fh' "`yr'," %21.15f (`b') "," %21.15f (`se') "," %21.15f (`ci_lo') "," %21.15f (`ci_hi') _n
            local coef_idx = `coef_idx' + 1
        }
    }
    file close `fh'
end

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 5B & 5C: GRADUATION PLACEBO EVENT STUDIES (annual)
* 5B: Non-Education Completions (l_all_other)
* 5C: Other Education Completions (l_nonkraft)
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "========================================================"
display "Figures 5B & 5C: Graduation Placebo Event Studies"
display "========================================================"

* Load graduation event data
import excel "data/raw/composite_treatment/graduation_event_data.xlsx", firstrow clear

* Merge with placebo data
merge 1:1 unitid year using "data/raw/placebo/placebo_data.dta", keep(match) nogenerate

* Drop excluded states
drop if State == "ND" | State == "TN"

* Fill forward test_index
sort unitid year
by unitid: replace test_index = test_index[_n-1] if test_index == .

* CT and SC adjustments
replace test_index = . if State == "CT" & year > 2017
replace test_index = -.6576 if State == "SC" & year > 2017

* Drop statefips — bootstrap idcluster(statefips) needs it as a NEW variable
capture drop statefips

display "  Graduation placebo data: " _N " obs"

* --- Figure 5B: Non-Education Completions ---
display ""
display "  Figure 5B: Non-Education Completions..."
bootstrap _b, reps(1000) seed(12345) cluster(State) idcluster(statefips): ///
    reghdfe l_all_other year_2009 year_2010 year_2011 year_2013 year_2014 ///
    year_2015 year_2016 year_2017 year_2018 year_2019 year_2020 ///
    $all_controls, absorb(unitid year)

* Write CSV
local years_grad "2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020"
write_es_csv "output/tables/placebo_non_ed_completions.csv" 2012 "`years_grad'"
display "  Saved: placebo_non_ed_completions.csv"

* --- Figure 5C: Other Education Completions ---
display ""
display "  Figure 5C: Other Education Completions..."
bootstrap _b, reps(1000) seed(12345) cluster(State) idcluster(statefips): ///
    reghdfe l_nonkraft year_2009 year_2010 year_2011 year_2013 year_2014 ///
    year_2015 year_2016 year_2017 year_2018 year_2019 year_2020 ///
    $all_controls, absorb(unitid year)

write_es_csv "output/tables/placebo_other_ed_completions.csv" 2012 "`years_grad'"
display "  Saved: placebo_other_ed_completions.csv"

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 5A: ENROLLMENT PLACEBO EVENT STUDY (biennial)
* Non-Education Enrollments (l_non_ed_enrollment)
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "========================================================"
display "Figure 5A: Enrollment Placebo Event Study"
display "========================================================"

* Load enrollment event data
import excel "data/raw/composite_treatment/enrollments_event_data.xlsx", firstrow clear

* Merge with enrollment placebo data
merge 1:1 unitid year using "data/raw/placebo/placebo_enrollment_data.dta", keep(match) nogenerate

* Drop excluded states
drop if State == "ND" | State == "TN"

* Fill forward test_index
sort unitid year
by unitid: replace test_index = test_index[_n-1] if test_index == .

* CT and SC adjustments
replace test_index = . if State == "CT" & year > 2017
replace test_index = -.6576 if State == "SC" & year > 2017

* Drop statefips — bootstrap idcluster(statefips) needs it as a NEW variable
capture drop statefips

display "  Enrollment placebo data: " _N " obs"

bootstrap _b, reps(1000) seed(12345) cluster(State) idcluster(statefips): ///
    reghdfe l_non_ed_enrollment year_2008 year_2010 year_2014 year_2016 year_2018 ///
    $all_controls, absorb(unitid year)

local years_enroll "2008 2010 2012 2014 2016 2018"
write_es_csv "output/tables/enrollments_placebo_event_study_total.csv" 2012 "`years_enroll'"
display "  Saved: enrollments_placebo_event_study_total.csv"

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 6: STATE LICENSES EVENT STUDY
* DV = log(licenses), FE = State + Year, State-level controls only
* Years: 2010-2019
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "========================================================"
display "Figure 6: State Licenses Event Study"
display "========================================================"

* Load state treatment data
use "data/raw/policy/state_treatment.dta", clear

* Drop excluded states
drop if State == "ND" | State == "TN"

* Fill forward test_index
sort State year
by State: replace test_index = test_index[_n-1] if test_index == .

* CT and SC adjustments
replace test_index = . if State == "CT" & year > 2017
replace test_index = -.6576 if State == "SC" & year > 2017

* Merge with license counts
merge 1:1 statename year using ///
    "data/raw/licenses/stateyrlicensetradalt.dta", ///
    keep(match) nogenerate

* Create log licenses
gen log_licenses = log(licenses)

* Restrict to 2010-2019
keep if year >= 2010 & year <= 2019

* Encode state for FE
encode State, gen(state_f)

display "  License data: " _N " obs"
quietly tab State
display "  States: " r(r)

* Event study: year_2010, year_2011, year_2013-year_2019 (ref=2012)
bootstrap _b, reps(1000) seed(12345) cluster(State) idcluster(state_id): ///
    reghdfe log_licenses year_2010 year_2011 year_2013 year_2014 ///
    year_2015 year_2016 year_2017 year_2018 year_2019 ///
    $state_demo $state_kraft, absorb(state_f year)

local years_lic "2010 2011 2012 2013 2014 2015 2016 2017 2018 2019"
write_es_csv "output/tables/state_licenses_event_study_coefficients.csv" 2012 "`years_lic'"
display "  Saved: state_licenses_event_study_coefficients.csv"

* ══════════════════════════════════════════════════════════════════════════════
* FIGURE 7: TEACHER SHORTAGES EVENT STUDY
* DV = ln_shortages, FE = State + Year, State-level controls only
* Years: 2009-2020
* ══════════════════════════════════════════════════════════════════════════════

display ""
display "========================================================"
display "Figure 7: Teacher Shortages Event Study"
display "========================================================"

* Load state treatment data
use "data/raw/policy/state_treatment.dta", clear

* Drop excluded states
drop if State == "ND" | State == "TN"

* Fill forward test_index
sort State year
by State: replace test_index = test_index[_n-1] if test_index == .

* CT and SC adjustments
replace test_index = . if State == "CT" & year > 2017
replace test_index = -.6576 if State == "SC" & year > 2017

* Merge with shortage data
merge 1:1 statename year using ///
    "data/raw/shortages/total_shortages_state_year.dta", ///
    keep(match) nogenerate

* Restrict to 2009-2020
keep if year >= 2009 & year <= 2020

* Drop missing ln_shortages
drop if ln_shortages == .

* Encode state for FE
encode State, gen(state_f)

display "  Shortage data: " _N " obs"
quietly tab State
display "  States: " r(r)

* Event study: year_2009-year_2011, year_2013-year_2020 (ref=2012)
bootstrap _b, reps(1000) seed(12345) cluster(State) idcluster(state_id): ///
    reghdfe ln_shortages year_2009 year_2010 year_2011 year_2013 year_2014 ///
    year_2015 year_2016 year_2017 year_2018 year_2019 year_2020 ///
    $state_demo $state_kraft, absorb(state_f year)

local years_short "2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020"
write_es_csv "output/tables/teacher_shortage_event_study.csv" 2012 "`years_short'"
display "  Saved: teacher_shortage_event_study.csv"

* ──────────────────────────────────────────────────────────────────────────────
* Summary
* ──────────────────────────────────────────────────────────────────────────────

display ""
display "========================================================"
display "05: Secondary Regressions Summary"
display "========================================================"
display "Created:"
display "  - Figure 5A: Non-Education Enrollments (biennial)"
display "  - Figure 5B: Non-Education Completions (annual)"
display "  - Figure 5C: Other Education Completions (annual)"
display "  - Figure 6: State Licenses (annual, 2010-2019)"
display "  - Figure 7: Teacher Shortages (annual, 2009-2020)"
display ""
display "05_secondary_regressions.do complete."
